Definitions:
OtherExpendituresAmt: Other expenditures for facilities
and programslibrary(tidyverse)
library(kableExtra)
library(scales)
library(plotly)
library(glue)
library(here)
library(viridis)
library(ggrepel)
all_plots <- FALSE
m <- list(
l = 50,
r = 50,
b = 200,
t = 150,
pad = 0.5
)
theme_c <- function(...){
# font <- "Helvetica" #assign font family up front
theme_bw() %+replace% #replace elements we want to change
theme(
#text elements
plot.title = element_text( #title
size = 14, #set font size
face = 'bold', #bold typeface
hjust = .5,
vjust = 3),
plot.subtitle = element_text( #subtitle
size = 12,
hjust = .5,
face = 'italic',
vjust = 3), #font size
axis.title = element_text( #axis titles
size =14), #font size
axis.text.x = element_text( #axis text
size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 11, face="bold"),
# t, r, b, l
plot.margin = unit(c(1,.5,.5,.5), "cm"),
legend.position = "right",
strip.text.x = element_text(size = 18, face = "bold", color="white"),
strip.text.y = element_text(size = 18, face = "bold", color="white"),
strip.background = element_rect(fill = "#3E3D3D")
) %+replace%
theme(...)
}
# load companies file of EIN to name and endowment data
companies_to_ein <- readRDS(here("data", "companies.RDS"))
endowment_data <- read_rds(here("data",
"endowments_by_most_recent_filings.RDS")) %>%
select(-c(EndowmentsHeldUnrelatedOrgInd, EndowmentsHeldRelatedOrgInd)) %>%
pivot_longer(-c(EIN, fiscal_year),
names_to = "variable_name") %>%
left_join(companies_to_ein) %>%
mutate(fiscal_year=as.numeric(paste(fiscal_year)),
organization_name = ifelse(is.na(organization_name),
EIN, organization_name))
# extract return dates
source(here("GET_VARS.R"))
files <- dir(here("ballet_990_released_20230208"),
full.names = TRUE)
dates <- map_df(files,
~get_df(filename = .x,
variables = c("//Return//ReturnHeader//TaxPeriodEndDt"))) %>%
mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
filter_ein()
saveRDS(dates, here('data', 'dates.RDS'))
dates <- readRDS( here('data', 'dates.RDS')) %>%
select(EIN, TaxPeriodEndDt, fiscal_year)
endowment_data <- endowment_data %>%
mutate(fiscal_year=as.numeric(paste(fiscal_year))) %>%
left_join(dates)
endowment_data_wide <- endowment_data %>%
pivot_wider(names_from=variable_name,
values_from=value)
# function to plot variables of interest against each other
plot_ranks <- function(var1, var2, data) {
plt <- data %>%
group_by(fiscal_year) %>%
# arrange(var1) %>%
mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
# arrange(var2) %>%
mutate("{var2}_rank" := rank(-!!sym(var2)), na.last = "keep") %>%
ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
color = organization_name,
label =EIN
)) +
geom_point() +
geom_function(fun=function(x)x,color="darkred", alpha = .8) +
labs(x = paste0(var1, " Rank"),
y = paste0(var2, " Rank")) +
theme_bw() +
labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
facet_wrap(~fiscal_year)+
theme(plot.title = element_text(size = 14,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold"))
ggplotly(plt, margin = m, height = 550) %>%
partial_bundle()
}
plot_ranks_by_consistency <- function(var1, var2, data) {
plt <- data %>%
filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
group_by(fiscal_year) %>%
# arrange(var1) %>%
mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
# arrange(var2) %>%
mutate("{var2}_rank" := rank(-!!sym(var2)), na.last = "keep") %>%
mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
group_by(EIN) %>%
mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
sum_neg = sum(rank_diff < 0, na.rm=TRUE),
sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
ungroup() %>%
mutate(prop_positive = sum_pos / (sum_pos + sum_neg + sum_zero)) %>%
ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
color = prop_positive,
group =organization_name
)) +
geom_function(fun=function(x)x,color="darkred", alpha = .8, n =201) +
geom_point() +
labs(x = paste0(var1, " Rank"),
y = paste0(var2, " Rank"),
title = glue("Rank of {var2} vs. Rank of {var1}"),
color = glue("Proportion of Years\nWhere {var1}\nRanked Higher than\n{var2}")) +
theme_c(legend.text=element_text(size =8)) +
scale_color_gradient2(high="#5935CF", low="#D18E01", mid="#E2E2E2", limits=c(0,1), midpoint=.5) +
facet_wrap(~fiscal_year, ncol=5)+
theme(plot.title = element_text(size = 12,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold")) +
scale_x_reverse() +
scale_y_reverse()
return(plt)
}
vars <- unique(endowment_data$variable_name)[!grepl("EOY|Admin|Grants", unique(endowment_data$variable_name))]
# pairwise combinations of variables
variable_combinations <- t(combn(vars, 2)) %>%
as.data.frame()
if (!all_plots) variable_combinations <- variable_combinations[1:4,]
#######################################
# CREATE BIG PALETTE WITH MANY COLORS
#######################################
pal1 <- viridis_pal(option="mako", end = .8)(12)
pal2 <- viridis_pal(option="rocket", end = .8)(12)
pal3 <- viridis_pal(option="magma", end = .8)(12)
pal4 <- viridis_pal(option="inferno", end = .8)(12)
pal <- c(pal1, pal2, pal3, pal4)
indexes <- sample.int(length(pal), replace=FALSE)
pal <- pal[indexes]
###################################################################################################
# table of organization that had endowment in single category for all years on file
###################################################################################################
endowment_data_wide %>%
select(contains("EOY"),fiscal_year,
EIN, organization_name) %>%
group_by(organization_name) %>%
summarize(across(contains("EOY"),~ mean(.x, na.rm=TRUE))) %>%
filter(if_any(contains("EOY"), ~.x==1 )) %>%
pivot_longer(contains("EOY")) %>%
mutate(name = case_when(
name == "TermEndowmentBalanceEOYPct" ~ "Temporarily restricted endowment",
name == "PrmnntEndowmentBalanceEOYPct" ~ "Permanent endowment",
name == "BoardDesignatedBalanceEOYPct" ~ "Board designated or quasi-endowment"
)) %>%
filter(!is.na(value) & value !=0) %>%
group_by(name) %>%
summarize(`Organization Name` = paste0(organization_name, collapse = "\n")) %>%
select(`Endowment Type` = name,
`Organization Name`) %>%
kbl() %>%
row_spec(row=0, color="white", background="#3E3D3D")
| Endowment Type | Organization Name |
|---|---|
| Board designated or quasi-endowment | Ballet Quad Cities Canyon Concert Ballet The Tallahassee Ballet |
| Permanent endowment | American Repertory Ballet Aspen Santa Fe Ballet Ballet West BalletMet Colorado Ballet Dance Theatre of Harlem Madison Ballet New Mexico Ballet Company Oregon Ballet Theatre Orlando Ballet Pittsburgh Ballet Theatre |
| Temporarily restricted endowment | Ballet Des Moines First State Ballet Theatre |
###################################################################################################
# split labels into 2 groups, one labeled on earliest date, one on latest
###################################################################################################
set.seed(999)
all_eins <- unique(endowment_data_wide$EIN)
all_eins <- endowment_data_wide %>%
# arrange(desc(BeginningYearBalanceAmt)) %>%
# select(EIN, organization_name) %>%
# distinct() %>%
pull(EIN) %>%
unique()
indexes_all <- 1:length(all_eins)
first <- sample.int(floor(length(all_eins)/2), replace=FALSE)
ein_beginning <- all_eins[first]
ein_end <- all_eins[indexes_all[!indexes_all %in% first]]
ein_end = all_eins[indexes_all]
ranks <- endowment_data_wide %>%
filter(fiscal_year <= 2020) %>%
group_by(organization_name) %>%
summarize(fiscal_year = max(fiscal_year),
BeginningYearBalanceAmt = BeginningYearBalanceAmt[which.max(fiscal_year)]) %>%
mutate(rank = rank(-BeginningYearBalanceAmt))
top_20 <- ranks %>% filter(rank <= 20)
not_top_20 <- ranks %>% filter(rank > 20)
dat <- endowment_data_wide %>%
select(contains("EOY"),fiscal_year,
EIN, organization_name) %>%
mutate(rank_category = ifelse(organization_name %in% top_20$organization_name,
"Endowment Ranked in Top 20",
"Endowment Not Ranked in Top 20")) %>%
pivot_longer(cols = contains("EOY")) %>%
mutate(name = case_when(
name == "TermEndowmentBalanceEOYPct" ~ "Temporarily restricted endowment",
name == "PrmnntEndowmentBalanceEOYPct" ~ "Permanent endowment",
name == "BoardDesignatedBalanceEOYPct" ~ "Board designated or quasi-endowment"
)) %>%
filter(!is.na(value)) %>%
group_by(organization_name, name) %>%
mutate(xlabel = ifelse(EIN %in% ein_beginning,
min(fiscal_year),
max(fiscal_year)),
value=100*value,
ylabel = ifelse(EIN %in% ein_beginning,
value[which.min(fiscal_year)],
value[which.max(fiscal_year)]) )
dat_labels <- dat %>%
select(organization_name,xlabel,ylabel,name,rank_category) %>%
distinct()
dat %>%
ggplot(aes(x=fiscal_year,
y = value,
color = organization_name)) +
geom_line(show.legend=FALSE,
alpha = .95) +
facet_wrap(~name) +
theme_c(strip.text = element_text(margin =margin(3,0,25,0),
size =4,
lineheight=.5),
strip.text.y = element_text(angle=0, color="white", size =18),
legend.position="none") +
scale_color_manual(values=pal) +
geom_point(size=.5, alpha =.5) +
labs(y="Percentage of Endowment in Category",
x = "Fiscal Year") +
geom_label_repel(aes(label = organization_name,
y = ylabel,
x = xlabel,
color=organization_name),
size = 2,
seed=124,
data=dat_labels,
min.segment.length=1,
label.size = 1.2,
force=.8,
# direction="y",
force_pull = 8,
max.overlaps = 20) +
geom_label_repel(aes(label = organization_name,
color=organization_name,
y = ylabel, x = xlabel),
color = "black",
label.size = NA,
size = 2,
seed=124,
data=dat_labels,
min.segment.length=1,
force=.8,
# direction="y",
force_pull=8,
max.overlaps = 20) +
facet_grid(rank_category~name)
# plotlist <- pmap(variable_combinations, ~{
# #plt <- plot_ranks(var1 = .x, var2 = .y, data = endowment_data_wide)
# plt <- plot_ranks_by_consistency(var1 = .x, var2 = .y, data = endowment_data_wide) %>% partial_bundle()
#
# }
# )
plot_ranks_by_consistency("BeginningYearBalanceAmt",
"ContributionsAmt",
data = endowment_data_wide) +
labs(title = "Rank of Endowment Beginning of Year Balance versus the Rank of Contributions",
y = "Rank of Contributions",
x = "Rank of Beginning of Year Balance") +
theme(axis.text.x = element_text(size =9),
axis.text.y= element_text(size =9),
strip.text=element_text(size = 16, color = "white"),
plot.title=element_text(hjust = .5, face="bold", size = 18))
plot_ranks_by_consistency("BeginningYearBalanceAmt",
"OtherExpendituresAmt",
data = endowment_data_wide)+
labs(title = "Rank of Beginning of Year Balance versus the Rank of Other Expenditures",
y = "Rank of Other Expenditures",
x = "Rank of Beginning of Year Balance") +
theme(axis.text.x = element_text(size =9),
axis.text.y= element_text(size =9),
strip.text=element_text(size = 16, color = "white"),
plot.title=element_text(hjust = .5, face="bold", size = 18))
#
#
# rankings_over_time <- endowment_data_wide %>%
# filter(fiscal_year >=2011 & fiscal_year <=2020) %>%
# group_by(fiscal_year) %>%
# mutate(rank = rank(-BeginningYearBalanceAmt, na.last = "keep")) %>%
# filter(!is.na(rank)) %>%
# ggplot(aes(x=fiscal_year, y = rank, color =organization_name)) +
# geom_line() +
# scale_color_viridis(option = "rocket", discrete=TRUE) +
# theme_c()
#
# ggplotly(rankings_over_time, margin = m, height = 550)
plot_ranks_over_time <- function(dat,var) {
plt <- dat %>%
filter(fiscal_year >=2011 & fiscal_year <=2020) %>%
group_by(fiscal_year) %>%
mutate(rank = rank(-!!sym(var), na.last = "keep")) %>%
filter(!is.na(rank)) %>%
ggplot(aes(x=fiscal_year, y = rank, color =organization_name)) +
geom_line() +
geom_point(alpha = .3, size =.5) +
scale_color_viridis(option = "rocket", discrete=TRUE) +
theme_c() +
scale_y_continuous(n.breaks = 10) +
scale_x_continuous(breaks=2011:2020) +
labs(y = paste("Rank of ", var),
x = "Fiscal Year",
title = paste0("Rank over time for ", var))
ggplotly(plt, margin = m, height = 550)
}
plotlist <- map(vars,~ {
plt<- plot_ranks_over_time(endowment_data_wide,.x) %>% partial_bundle()
})
htmltools::tagList(setNames(plotlist, NULL))